package sis.ra.crawlingInfobox;

import java.util.ArrayList;

import sis.ra.utility.Utils;


public class crawlingListOfProducts {

	public static ArrayList<String> getListProducts(String pagename)
	{
		ArrayList<String> products=new ArrayList<String>();

		String wikipage="http://en.wikipedia.org/w/api.php?action=query&prop=revisions&titles=API|"+pagename.replace(" ", "%20")+"&rvprop=content&format=xml";
		String wikicontent=Utils.fetchContentfrURL(wikipage);
//		System.out.println(wikicontent);
		if (wikicontent.contains("<revisions><rev>#REDIRECT [[Application programming interface]]{{R from abbreviation}}</rev></revisions>"))
		{			
			wikicontent=wikicontent.replace("<revisions><rev>#REDIRECT [[Application programming interface]]{{R from abbreviation}}</rev></revisions>", "");
		}
//		System.out.println(wikicontent);
		if (wikicontent.contains("<revisions><rev>#REDIRECT [[")&&wikicontent.contains(pagename))
		{
			String newpagename=wikicontent.substring(wikicontent.indexOf(pagename));
//			System.out.println(newpagename);
			if (newpagename.contains("<revisions><rev>#REDIRECT [["))
			{				
				newpagename=newpagename.substring(newpagename.indexOf("<revisions><rev>#REDIRECT [[")+28);
			}
			newpagename=newpagename.substring(0,newpagename.indexOf("]]"));
//			System.out.println("newpagename "+newpagename);
			products=getListProducts(newpagename);		
			return products;
		}
		
		String [] lines=wikicontent.split("\n");
		for (int i=0;i<lines.length;i++)
		{
	//		System.out.println(lines[i]);
				if (lines[i].trim().startsWith("*"))
				{
					if (lines[i].contains("'''"))
					{
		//				System.out.println(lines[i]);
						int start=lines[i].indexOf("'''")+3;
						int offset=lines[i].substring(start).indexOf("'''");
		//				System.out.println(start+" "+end);
						String product=lines[i].substring(start,offset+start);
						product=product.replace("[[", "").replace("]]","");
						if (product.length()<1) continue;
						if (!products.contains(product))
						{
							products.add(product);
						}
					}else					
					if (lines[i].contains("[[")&&lines[i].contains("]]"))
					{
						int start=lines[i].indexOf("[[")+2;
						int end=lines[i].indexOf("]]");
						String product=lines[i].substring(start,end);
						if (!products.contains(product))
						{
							products.add(product.trim());
						}
					}
				}
			}
	//	System.out.println(products.size());
		return products;
		
	}
	
	public static String toString(ArrayList<String> products)
	{
		String product="";
		for (int i=0;i<products.size();i++)
		{
			product=product+products.get(i)+" ; ";
		}
		return product;
	}
	
	public static void main(String[] args) {
		String company="List_of_Yahoo!-owned_sites_and_services";
		ArrayList<String> products=getListProducts(company);
		String pro=toString(products);
		System.out.println(pro);
	}

}
